#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2023/9/26 11:11
# @Author  : 王凯
# @File    : qingdao_grade.py
# @Project : spider-man
import datetime

import scrapy

from apps.creadit_grade_a.creadit_grade_a.items import NetCreditGradeAItem
from utils.tools import parse_url_params


class QingdaoGradeSpider(scrapy.Spider):
    name = "qingdao_grade"
    province = "青岛"
    url = "https://info.qingdao.chinatax.gov.cn/webPortals/page/nsxy.html"
    Request = scrapy.Request
    FormRequest = scrapy.FormRequest

    def start_requests(self):
        url = "https://info.qingdao.chinatax.gov.cn/webPortals/page/nsxy.html"
        yield self.Request(url, callback=self.parse_nd)

    def parse_nd(self, response, **kwargs):
        url = "https://info.qingdao.chinatax.gov.cn/interact_p/Cxjk"
        for nd in range(datetime.datetime.now().year - 1, datetime.datetime.now().year + 1):
            data = {"pageNumber": "1", "pageSize": "10", "nsrsbh": "", "qymc": "", "swjg": "", "pjnd": f"{nd}"}
            yield self.FormRequest(url, formdata=data, method="POST", callback=self.parse_page)

    def parse_page(self, response, **kwargs):
        root_url, request_data = parse_url_params(response.request.body.decode())
        yield from self.parse_detail(response, **kwargs)
        total_num = response.json().get("obj")
        if total_num:
            total_num = int(total_num[0].get("totalnum"))
            total_page = total_num // 10 + 1
            for page in range(2, int(total_page) + 1):
                data = {
                    "pageNumber": f"{page}",
                    "pageSize": "10",
                    "nsrsbh": "",
                    "qymc": "",
                    "swjg": "",
                    "pjnd": request_data.get("pjnd"),
                }
                yield self.FormRequest(response.request.url, formdata=data, method="POST", callback=self.parse_detail)

    def parse_detail(self, response, **kwargs):
        datas = response.json().get("obj")
        if datas:
            for data in datas:
                item = NetCreditGradeAItem()
                item.taxpayer_id = data.get("nsrsbh")
                item.company_name = data.get("nsrmc")
                item.year = data.get("timeymd")
                item.province = self.province
                yield item


def run():
    from scrapy import cmdline

    cmdline.execute("scrapy crawl qingdao_grade".split())


if __name__ == "__main__":
    run()
